eReefs_nc <- ncdf4::nc_open("eReefs_assessment.nc")lat = ncdf4::ncvar_get(eReefs_nc, "latitude")long = ncdf4::ncvar_get(eReefs_nc, "longitude")time = ncdf4::ncvar_get(eReefs_nc, "time")tunits = ncdf4::ncatt_get(eReefs_nc, "time", "units")cf = CFtime::CFtime(tunits$value, calendar ="standard", time) timestamps = CFtime::as_timestamp(cf) timestamps =as.Date(timestamps, format ="%Y-%m-%d") # explanatory variables temp = ncdf4::ncvar_get(eReefs_nc, "temp")salt = ncdf4::ncvar_get(eReefs_nc, "salt")total_nitrogen = ncdf4::ncvar_get(eReefs_nc, "TOTAL_NITROGEN")ph = ncdf4::ncvar_get(eReefs_nc, "PH")macroalgae = ncdf4::ncvar_get(eReefs_nc, "MA_N")# convert extracted data into a data frameeReefs_df =expand.grid(long = long, lat = lat, time = timestamps) %>%mutate(temp =as.vector(temp),salt =as.vector(salt),total_nitrogen =as.vector(total_nitrogen),ph =as.vector(ph),macroalgae =as.vector(macroalgae) )# remove rows where all environmental variables are NAeReefs_df = eReefs_df %>%filter(!is.na(temp) |!is.na(salt) |!is.na(total_nitrogen) |!is.na(ph))ncdf4::nc_close(eReefs_nc)head(eReefs_df)
Itmp & manta dataset
Code
ltmp_data <-read_csv("ltmp_hc_sc_a_by_site.csv")
Rows: 18540 Columns: 12
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): SECTOR, SHELF, REEF_NAME, REEF_ID, GROUP_CODE
dbl (6): SITE_NO, LATITUDE, LONGITUDE, VISIT_NO, YEAR_CODE, COVER
date (1): SAMPLE_DATE
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# filter & clean Bleaching Surveys bleaching_surveys_filtered <- bleaching_surveys %>%mutate(sample_date =as.Date(surveyDate),year =year(sample_date)) %>%filter( year >=2015& year <=2017, latitude >=-24& latitude <=-9, longitude >=142& longitude <=155 ) %>%mutate(Zone =case_when( latitude <=-9& latitude >-16~"Northern GBR", latitude <=-16& latitude >-21~"Central GBR", latitude <=-21& latitude >=-24~"Southern GBR",TRUE~NA_character_ ) )# filter & clean LTMP datasetltmp_data_filtered <- ltmp_data %>%filter( SAMPLE_DATE >=as.Date("2015-01-01") & SAMPLE_DATE <=as.Date("2017-12-31"), LATITUDE >=-24& LATITUDE <=-9, LONGITUDE >=142& LONGITUDE <=155 ) %>%mutate(Zone =case_when( LATITUDE <=-9& LATITUDE >-16~"Northern GBR", LATITUDE <=-16& LATITUDE >-21~"Central GBR", LATITUDE <=-21& LATITUDE >=-24~"Southern GBR",TRUE~NA_character_ ) )# filter & clean MANTA dataset manta_data_filtered <- manta_data %>%filter( SAMPLE_DATE >=as.Date("2015-01-01") & SAMPLE_DATE <=as.Date("2017-12-31"), LATITUDE >=-24& LATITUDE <=-9, LONGITUDE >=142& LONGITUDE <=155 ) %>%mutate(Zone =case_when( LATITUDE <=-9& LATITUDE >-16~"Northern GBR", LATITUDE <=-16& LATITUDE >-21~"Central GBR", LATITUDE <=-21& LATITUDE >=-24~"Southern GBR",TRUE~NA_character_ ) )
# filter eReefs data to GBR region and 2015–2017ereefs_data_filtered <- eReefs_df %>%filter( lat >=-24& lat <=-9, long >=142& long <=155, time >=as.Date("2015-01-01") & time <=as.Date("2017-12-31") ) %>%mutate(Year = lubridate::year(time),Zone =case_when( lat <=-9& lat >-16~"Northern GBR", lat <=-16& lat >-21~"Central GBR", lat <=-21& lat >=-24~"Southern GBR",TRUE~NA_character_ ) )dim(ereefs_data_filtered)head(ereefs_data_filtered)
Code
dim(ereefs_data_filtered)
[1] 267834 10
Code
dim(bleaching_surveys_filtered)
[1] 7608 12
Code
dim(ltmp_data_filtered)
[1] 1764 13
EDA
Distribution of Bleaching Severity (Overall)
Code
ggplot(bleaching_surveys_filtered, aes(x =factor(bleachCat))) +geom_bar(fill ="steelblue") +labs(title ="Distribution of Bleaching Severity",x ="Bleaching Category", y ="Count") +theme_minimal()
Mean Environmental Variables by Zone
Code
ereefs_data_filtered %>%group_by(Zone) %>%summarise(mean_temp =mean(temp, na.rm =TRUE)) %>%ggplot(aes(x = Zone, y = mean_temp, fill = Zone)) +geom_col(show.legend =FALSE) +labs(title ="Mean Sea Surface Temperature by GBR Zone", y ="SST (°C)", x ="") +theme_minimal()
Coral Cover by Zone (LTMP)
Code
ggplot(ltmp_data_filtered, aes(x = Zone, y = COVER, fill = Zone)) +geom_boxplot(show.legend =FALSE) +labs(title ="Distribution of Coral Cover by Zone", y ="Coral Cover (%)") +theme_minimal()
Exploratory Visualizations
Map of Survey Locations
Code
# australia coastlineoz <-map_data("worldHires", region ="Australia")p_map <-ggplot() +geom_polygon(data = oz, aes(x = long, y = lat, group = group), fill ="gray90", color =NA) +# bleaching datageom_point(data = bleaching_surveys_filtered,aes(x = longitude, y = latitude, color = bleachCat),alpha =0.8, size =2.5) +# horizontal red lines separating zonesgeom_hline(yintercept =c(-16, -21), color ="red", linetype ="dashed", size =1) +# color scale: higher = brighter, lower = darker (standard viridis)scale_color_viridis_c(name ="Bleaching Category", option ="viridis") +coord_fixed(xlim =c(142, 155), ylim =c(-25, -9)) +labs(title ="Coral Bleaching Severity (2015–2017) with GBR Zone Boundaries",x ="Longitude", y ="Latitude") +theme_minimal(base_size =14) +theme(legend.position ="right",plot.title =element_text(face ="bold", hjust =0.5),panel.grid =element_line(color ="gray90") )
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
for 'Coral Bleaching Severity (2015–2017) with GBR Zone Boundaries' in
'mbcsToSbcs': - substituted for – (U+2013)
Bleaching by Year and Zone
Code
p_prop <-ggplot(bleaching_surveys_filtered, aes(x =factor(year), fill =factor(bleachCat))) +geom_bar(position ="fill") +facet_wrap(~Zone) +labs(title ="Proportion of Bleaching Severity by Year and Zone (2015–2017)",y ="Proportion",x ="Year",fill ="Bleaching Severity" ) +scale_fill_viridis_d(option ="viridis", name ="Bleaching Category") +theme_minimal(base_size =14) +theme(plot.title =element_text(face ="bold", hjust =0.5),legend.position ="right" )p_prop
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
for 'Proportion of Bleaching Severity by Year and Zone (2015–2017)' in
'mbcsToSbcs': - substituted for – (U+2013)
Environmental Trends (eReefs) Over Time
Code
p_sst <- ereefs_data_filtered %>%group_by(Year, Zone) %>%summarise(mean_temp =mean(temp, na.rm =TRUE)) %>%ggplot(aes(x = Year, y = mean_temp, color = Zone)) +geom_line(size =1.2) +labs(title ="Mean SST by Zone (2015–2017)", y ="Sea Surface Temp (°C)") +theme_minimal()
`summarise()` has grouped output by 'Year'. You can override using the
`.groups` argument.
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
for 'Mean SST by Zone (2015–2017)' in 'mbcsToSbcs': - substituted for –
(U+2013)
Dataset merging
Code
bleaching_surveys_filtered <- bleaching_surveys_filtered %>%mutate(surveyDate =as.Date(surveyDate))# convert both datasets to sf objects (Spatial Features)bleach_sf <-st_as_sf(bleaching_surveys_filtered, coords =c("longitude", "latitude"), crs =4326)ereefs_sf <-st_as_sf(ereefs_data_filtered, coords =c("long", "lat"), crs =4326)# spatial join within 1000 metersjoined_sf <-st_join(bleach_sf, ereefs_sf, join = st_is_within_distance, dist =5000, left =TRUE)# filter for rows where years matchjoined_filtered <- joined_sf %>%mutate(bleach_year =year(surveyDate),env_year = Year ) %>%filter(bleach_year == env_year)# convert back to data frame for further analysis or modelingjoined_df <- joined_filtered %>%as.data.frame()dim(joined_df)head(joined_df)
# extract lat/lon from geometry column if not sf anymorejoined_df <- joined_df %>%mutate(longitude =as.numeric(gsub("c\\(([^,]+),.*", "\\1", geometry)),latitude =as.numeric(gsub("c\\([^,]+,\\s*([^\\)]+)\\)", "\\1", geometry)) )# create GBR region labelsjoined_df <- joined_df %>%mutate(region =case_when( latitude <=-9& latitude >-16~"Northern GBR", latitude <=-16& latitude >-21~"Central GBR", latitude <=-21& latitude >=-24~"Southern GBR",TRUE~NA_character_ )) %>%filter(!is.na(region))# plot temperature vs bleachCat by regionp1 <-ggplot(joined_df, aes(x = temp, y =as.numeric(as.character(bleachCat)), color = bleachCat)) +geom_jitter(alpha =0.4, size =1.5) +geom_smooth(method ="lm", se =FALSE, color ="blue") +facet_wrap(~ region, ncol =3) +labs(title ="Relationship between Temperature and Coral Bleaching Category",subtitle ="By Region of the Great Barrier Reef",x ="Sea Surface Temperature (°C)",y ="Bleaching Category (0–4)" ) +theme_minimal()# plot total_nitrogen vs bleachCat by regionp2 <-ggplot(joined_df, aes(x = total_nitrogen, y =as.numeric(as.character(bleachCat)), color = bleachCat)) +geom_jitter(alpha =0.4, size =1.5) +geom_smooth(method ="lm", se =FALSE, color ="blue") +facet_wrap(~ region, ncol =3) +labs(title ="Relationship between Total Nitrogen and Coral Bleaching Category",subtitle ="By Region of the Great Barrier Reef",x ="Total Nitrogen (µmol/L)",y ="Bleaching Category (0–4)" ) +theme_minimal()print(p1)
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
for 'Bleaching Category (0–4)' in 'mbcsToSbcs': - substituted for – (U+2013)
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
for 'Bleaching Category (0–4)' in 'mbcsToSbcs': - substituted for – (U+2013)
Predictive Modeling
Code
# select features and target features <-c("temp", "salt", "total_nitrogen", "ph", "macroalgae", "depth")target <-"bleachCat"# prepare modeling dataframe modeling_df <- joined_df[, c(features, target)]modeling_df <- modeling_df[complete.cases(modeling_df), ]# fill missing values with column meansmodeling_df[features] <-lapply(modeling_df[features], function(x) {ifelse(is.na(x), mean(x, na.rm =TRUE), x)})# make sure target is a factor (for classification)modeling_df$bleachCat <-as.factor(modeling_df$bleachCat)# 80/20 train-test splitset.seed(3888)train_index <-createDataPartition(modeling_df$bleachCat, p =0.8, list =FALSE)train_data <- modeling_df[train_index, ]test_data <- modeling_df[-train_index, ]cat("Train size:", nrow(train_data), "\n")
# ensure bleachCat is numeric for ordinal SVMtrain_data$bleachCat_ord <-as.numeric(as.character(train_data$bleachCat))test_data$bleachCat_ord <-as.numeric(as.character(test_data$bleachCat))svm_model <-svm(bleachCat_ord ~ ., data = train_data[, c(features, "bleachCat_ord")])svm_preds <-round(predict(svm_model, test_data[, features]))svm_acc <-mean(svm_preds == test_data$bleachCat_ord)cat("Ordinal SVM Accuracy:", round(svm_acc, 3), "\n")
Ordinal SVM Accuracy: 0.379
Ordinal Decision Tree
Code
# ensure bleachCat is an ordered factor train_data$bleachCat <-factor(train_data$bleachCat, ordered =TRUE)test_data$bleachCat <-factor(test_data$bleachCat, ordered =TRUE)# fit an ordinal-aware decision tree using ctree tree_model <-ctree(bleachCat ~ temp + salt + total_nitrogen + ph + macroalgae + depth,data = train_data)# predict on the test set tree_preds <-predict(tree_model, newdata = test_data)# convert to same type and compute accuracy tree_acc <-mean(as.character(tree_preds) ==as.character(test_data$bleachCat))cat("Ordinal Decision Tree (ctree) Accuracy:", round(tree_acc, 3), "\n")
Warning in levels(reference) != levels(data): longer object length is not a
multiple of shorter object length
Warning in confusionMatrix.default(factor(vglm_class),
factor(as.numeric(as.character(test_data$bleachCat)))): Levels are not in the
same order for reference and data. Refactoring data to match.
Confusion Matrix and Statistics
Reference
Prediction 0 1 2 3 4
0 125 57 41 82 22
1 0 0 0 0 0
2 0 0 0 0 0
3 64 18 41 51 34
4 0 0 0 0 0
Overall Statistics
Accuracy : 0.329
95% CI : (0.2893, 0.3706)
No Information Rate : 0.3533
P-Value [Acc > NIR] : 0.8894
Kappa : 0.0239
Mcnemar's Test P-Value : NA
Statistics by Class:
Class: 0 Class: 1 Class: 2 Class: 3 Class: 4
Sensitivity 0.6614 0.0000 0.0000 0.38346 0.0000
Specificity 0.4162 1.0000 1.0000 0.60945 1.0000
Pos Pred Value 0.3823 NaN NaN 0.24519 NaN
Neg Pred Value 0.6923 0.8598 0.8467 0.74924 0.8953
Prevalence 0.3533 0.1402 0.1533 0.24860 0.1047
Detection Rate 0.2336 0.0000 0.0000 0.09533 0.0000
Detection Prevalence 0.6112 0.0000 0.0000 0.38879 0.0000
Balanced Accuracy 0.5388 0.5000 0.5000 0.49646 0.5000
# STEP 3: Plot temperature vs bleachCat by regionp1 <-ggplot(joined_df, aes(x = temp, y =as.numeric(as.character(bleachCat)), color = bleachCat)) +geom_jitter(alpha =0.4, size =1.5) +geom_smooth(method ="lm", se =FALSE, color ="blue") +facet_wrap(~ region, ncol =3) +labs(title ="Relationship between Temperature and Coral Bleaching Category",subtitle ="By Region of the Great Barrier Reef",x ="Sea Surface Temperature (°C)",y ="Bleaching Category (0–4)" ) +theme_minimal()# STEP 4: Plot total_nitrogen vs bleachCat by regionp2 <-ggplot(joined_df, aes(x = total_nitrogen, y =as.numeric(as.character(bleachCat)), color = bleachCat)) +geom_jitter(alpha =0.4, size =1.5) +geom_smooth(method ="lm", se =FALSE, color ="blue") +facet_wrap(~ region, ncol =3) +labs(title ="Relationship between Total Nitrogen and Coral Bleaching Category",subtitle ="By Region of the Great Barrier Reef",x ="Total Nitrogen (µmol/L)",y ="Bleaching Category (0–4)" ) +theme_minimal()print(p1)
`geom_smooth()` using formula = 'y ~ x'
Code
print(p2)
`geom_smooth()` using formula = 'y ~ x'
Model/plot bleaching vs aggregated environmental exposure
Code
p_temp_bleach <-ggplot(ereefs_future_summary, aes(x = mean_temp, y = max_bleach, color = region)) +geom_jitter(width =0.2, height =0.2, alpha =0.5) +geom_smooth(method ="loess", se =FALSE) +facet_wrap(~ region) +labs(title ="Mean Temperature vs Max Predicted Bleaching (2018-2020)",x ="Mean Temperature (°C)",y ="Max Predicted Bleaching" ) +theme_minimal()p_temp_bleach
Warning: There were 3 warnings in `summarise()`.
The first warning was:
ℹ In argument: `across(...)`.
ℹ In group 1: `region = Northern GBR`.
Caused by warning in `cor()`:
! the standard deviation is zero
ℹ Run `dplyr::last_dplyr_warnings()` to see the 2 remaining warnings.
Code
print(region_cor_df)
# A tibble: 3 × 7
region temp salt total_nitrogen ph macroalgae depth
<fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Northern GBR 0.732 0.350 -0.656 -0.725 0.203 NA
2 Central GBR 0.565 0.00569 -0.430 -0.464 0.226 NA
3 Southern GBR 0.0722 -0.0120 -0.0143 -0.0307 -0.00350 NA
Visualize Correlation as Heatmap
Code
region_cor_long <- region_cor_df %>%pivot_longer(-region, names_to ="Variable", values_to ="Correlation")p_region_cor <-ggplot(region_cor_long, aes(x = Variable, y = region, fill = Correlation)) +geom_tile(color ="white") +scale_fill_gradient2(low ="blue", mid ="white", high ="darkred", midpoint =0) +labs(title ="Spearman Correlation: Environmental Factors vs Predicted Bleaching",x ="Environmental Variable", y ="Region") +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1))p_region_cor